Description: This project shows an example of customized scripts for linear mixed effects modeling, matrix bootstrapping, and convex hull analysis of response surface volumes in R. Data are from Van Nuland et al. (2020) "Symbiotic niche mapping reveals functional specialization by two ectomycorrhizal fungi that expands the host plant niche" in Fungal Ecology.
This experiment tested how two different species of microbial root symbionts fungi (mycorrhizal fungi) influence pine tree responses to soil nutrient limitation. I hypothesized that the fungi would provide greater benefits to the plants under increasing nutrient stress, but that the extent of these positive growth effects would differ depending on the specific fungal species and type of nutrient stress (e.g., nitrogen vs. phosphorus limitation).
See more details on the full project and published paper here: https://doi.org/10.1016/j.funeco.2020.100960.
The dataset for this example can be found here: https://github.com/mvannuland/DataSciPorfolio_datasets
1 - Setup
2.1 - Hypothesis testing with linear mixed effects model
2.2 - Partial linear regressions
3 - Plot.ly 3-D response surface graphs
4 - Bootstrapped convex hull volume analysis and density plot
Load the relevant R libraries and project dataset.
# Libraries for data wrangling and analysis
library(lme4)
library(lmerTest)
library(geometry)
library(fields)
library(reshape2)
# Libraries for plotting
library(ggplot2)
library(ggpubr)
library(manipulateWidget)
library(plotly)
library(viridis)
library(webshot)
theme_set(theme_bw())
# Load dataset
pinus.myc.dat <- readRDS(file="pinus.myc.rds")
# Subset data by fungal treatments
Control <- subset(pinus.myc.dat, pinus.myc.dat$MYC=="Control")
Fungi1 <- subset(pinus.myc.dat, pinus.myc.dat$MYC=="Fungi1")
Fungi2 <- subset(pinus.myc.dat, pinus.myc.dat$MYC=="Fungi2")
Mixed <- subset(pinus.myc.dat, pinus.myc.dat$MYC=="Mixed (F1+F2)")
# Remove NAs
pinus.myc.dat <- na.exclude(pinus.myc.dat)
This model tests whether the mycorrhizal fungi treatments (MYC), nutrient fertilization treatments (Nitrogen, Phosphorus), and/or their interactions affect total plant biomass. The experiment was created with randomized blocks (REP) which are included as random effects to account for any potential variation in the room where plants were growing that might have unintentionally altered their growth.
# Test lmer model
TotalBiomass.mod <- lmer(log(TotalBiomass) ~ MYC * log(Nitrogen) * log(Phosphorus) + (1|REP), data = na.exclude(pinus.myc.dat))
anova(TotalBiomass.mod)
One big take-away from the results is in the MYC x P interaction term, which shows that plant growth responses to phosphorus fertilization depend on the mycorrhizal treatments (evidence that supports one of the main hypotheses in this project). We can explore these patterns further with partial regressions, which is one way to isolate single variable effects while accounting for the variation attributed to other variables using residuals.
# Calculate residuals for Nitrogen and Phosphorus effects (isolate variation explained by Nitrogen vs. Phosphorus nutrient treatments)
TotalBiomass.mod.N.resids <- lm(log(TotalBiomass) ~ log(Nitrogen), data = pinus.myc.dat)
TotalBiomass.mod.P.resids <- lm(log(TotalBiomass) ~ log(Phosphorus), data = pinus.myc.dat)
N.resids <- resid(TotalBiomass.mod.N.resids)
P.resids <- resid(TotalBiomass.mod.P.resids)
pinus.myc.dat <- cbind.data.frame(pinus.myc.dat, N.resids, P.resids)
# Plot partial regressions using residuals
N.plot <- ggplot(aes(x=log(Nitrogen), y=P.resids), data = pinus.myc.dat) +
geom_point(position = position_jitterdodge(dodge.width = 0.3, jitter.width = 0.1), size = 1.5, aes(color = MYC), show.legend = T) +
geom_smooth(method = "loess", se=F, lwd=1, show.legend = F, aes(color = MYC), formula = 'y~x') +
scale_colour_viridis_d("Fungal treatment", direction = -1) +
xlab("Log Nitrogen (mg/kg soil)") +
ylab("Total Plant Biomass (residuals)") +
ylim(-1.5, 1.2) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
P.plot <- ggplot(aes(x=log(Phosphorus), y=N.resids), data = pinus.myc.dat) +
geom_point(position = position_jitterdodge(dodge.width = 0.3, jitter.width = 0.1), size = 1.5, aes(color = MYC), show.legend = F) +
geom_smooth(method = "loess", se=F, lwd=1, show.legend = F, aes(color = MYC), formula = 'y~x') +
scale_colour_viridis_d("Fungal treatment", direction = -1) +
xlab("Log Phosphorus (mg/kg soil)") +
ylab("Total Plant Biomass (residuals)") +
ylim(-1.5, 1.2) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggarrange(N.plot, P.plot, ncol=2, common.legend = TRUE, legend = "bottom")
Here, I show how you can use Plot.ly response surface graphs to visualize plant growth simultaneously across the two-dimensional axes of critical soil resources (nitrogen and phosphorus). These graphs are interactive - spin them and zoom them for a better look at the results!
# Create & smooth 7x7 matrices of total biomass responses across N and P gradients
Control.mat <- acast(Control, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
Fungi1.mat <- acast(Fungi1, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
Fungi2.mat <- acast(Fungi2, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
Mixed.mat <- acast(Mixed, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
# Smooths over missing matrix values
Control.mat.smooth <- image.smooth(Control.mat, theta=1)
Fungi1.mat.smooth <- image.smooth(Fungi1.mat, theta=1)
Fungi2.mat.smooth <- image.smooth(Fungi2.mat, theta=1)
Mixed.mat.smooth <- image.smooth(Mixed.mat, theta=1)
# Set same Z-axes limits across plots for easier comparison
axz <- list(range = c(180, 1180), title=list(text="Plant biomass"))
# Response surface plotting
# Control
Control.surface <-
plot_ly( z = ~Control.mat.smooth$z) %>%
add_surface(opacity = 0.95) %>%
hide_colorbar() %>%
layout(
title=list(text="Control", y = 0.90, font=list(color="black", size=20)),
scene=list(
aspectmode = 'cube',
xaxis=list(title="Phosphorus", autorange = "reversed"),
yaxis=list(title="Nitrogen", autorange = "reversed"),
zaxis=axz))
# Fungi1
Fungi1.surface <-
plot_ly( z = ~Fungi1.mat.smooth$z) %>%
add_surface(opacity = 0.95) %>%
hide_colorbar() %>%
layout(
title=list(text="Fungi 1", y = 0.90, font=list(color="black", size=20)),
scene=list(
aspectmode = 'cube',
xaxis=list(title="Phosphorus", autorange = "reversed"),
yaxis=list(title="Nitrogen", autorange = "reversed"),
zaxis=axz))
# Fungi2
Fungi2.surface <-
plot_ly( z = ~Fungi2.mat.smooth$z) %>%
add_surface(opacity = 0.95) %>%
hide_colorbar() %>%
layout(
title=list(text="Fungi 2", y = 0.90, font=list(color="black", size=20)),
scene=list(
aspectmode = 'cube',
xaxis=list(title="Phosphorus", autorange = "reversed"),
yaxis=list(title="Nitrogen", autorange = "reversed"),
zaxis=axz))
# Mixed (F1 + F2)
Mixed.surface <-
plot_ly( z = ~Mixed.mat.smooth$z) %>%
add_surface(opacity = 0.95) %>%
hide_colorbar() %>%
layout(
title=list(text="Mixed (F1 + F2)", y = 0.90, font=list(color="black", size=20)),
scene=list(
aspectmode = 'cube',
xaxis=list(title="Phosphorus", autorange = "reversed"),
yaxis=list(title="Nitrogen", autorange = "reversed"),
zaxis=axz))
widg <- combineWidgets(Control.surface, Fungi1.surface, Fungi2.surface, Mixed.surface, ncol = 2, nrow=2)
widg
I used a randomized subsampling and bootstrapping approach to create null distributions and calculate error estimates for convex hull volumes of the plant growth response surfaces to the nutrient treatments (above). This allowed me to compare how different fungal symbiont treatments affected the overall size and shape of two-dimensional plant responses to nutrient limitation.
set.seed(1324)
# Control
control.mat.mean.hull.vols <- NA
for(i in 1:1000){
control_matrix_list <- list()
volume_output_control <- NA
for(j in 1:100){
original.matrix<-Control
random.number<-rnorm(length(original.matrix$Nitrogen),0,1)
sample.order<-order(random.number)
resampled.matrix<-original.matrix[sample.order[1:round(length(sample.order)*0.5)],]
sampled.matrix <- acast(resampled.matrix, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
sampled.mat.smooth<- image.smooth(sampled.matrix, theta=1)
sampled.hull <- convhulln(melt(sampled.mat.smooth$z), output.options=TRUE)
volume_output_control[j]<-sampled.hull$vol
control_matrix_list[[j]] <- sampled.mat.smooth$z
}
control.ids.to.remove <- sapply(control_matrix_list, function(i) length(i) < 49)
control_matrix_list <- control_matrix_list[!control.ids.to.remove]
control.mat.mean <- Reduce("+", control_matrix_list) / length(control_matrix_list)
control.mat.mean.hull <- convhulln(melt(control.mat.mean), output.options=TRUE)
control.mat.mean.hull.vols[i] <- control.mat.mean.hull$vol
}
# Fungi 1
Fungi1.mat.mean.hull.vols <- NA
for(i in 1:1000){
Fungi1_matrix_list <- list()
volume_output_Fungi1 <- NA
for(j in 1:100){
original.matrix<-Fungi1
random.number<-rnorm(length(original.matrix$Nitrogen),0,1)
sample.order<-order(random.number)
resampled.matrix<-original.matrix[sample.order[1:round(length(sample.order)*0.5)],]
sampled.matrix <- acast(resampled.matrix, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
sampled.mat.smooth<- image.smooth(sampled.matrix, theta=1) #smooths over missing matrix values
sampled.hull <- convhulln(melt(sampled.mat.smooth$z), output.options=TRUE)
volume_output_Fungi1[j]<-sampled.hull$vol
Fungi1_matrix_list[[j]] <- sampled.mat.smooth$z
}
Fungi1.ids.to.remove <- sapply(Fungi1_matrix_list, function(i) length(i) < 49)
Fungi1_matrix_list <- Fungi1_matrix_list[!Fungi1.ids.to.remove]
Fungi1.mat.mean <- Reduce("+", Fungi1_matrix_list) / length(Fungi1_matrix_list) # Calculate means by x/y elements
Fungi1.mat.mean.hull <- convhulln(melt(Fungi1.mat.mean), output.options=TRUE)
Fungi1.mat.mean.hull.vols[i] <- Fungi1.mat.mean.hull$vol
}
# Fungi 2
Fungi2.mat.mean.hull.vols <- NA
for(i in 1:1000){
Fungi2_matrix_list <- list()
volume_output_Fungi2 <- NA
for(j in 1:100){
original.matrix<-Fungi2
random.number<-rnorm(length(original.matrix$Nitrogen),0,1)
sample.order<-order(random.number)
resampled.matrix<-original.matrix[sample.order[1:round(length(sample.order)*0.5)],]
sampled.matrix <- acast(resampled.matrix, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
sampled.mat.smooth<- image.smooth(sampled.matrix, theta=1) #smooths over missing matrix values
sampled.hull <- convhulln(melt(sampled.mat.smooth$z), output.options=TRUE)
volume_output_Fungi2[j]<-sampled.hull$vol
Fungi2_matrix_list[[j]] <- sampled.mat.smooth$z
}
Fungi2.ids.to.remove <- sapply(Fungi2_matrix_list, function(i) length(i) < 49)
Fungi2_matrix_list <- Fungi2_matrix_list[!Fungi2.ids.to.remove]
Fungi2.mat.mean <- Reduce("+", Fungi2_matrix_list) / length(Fungi2_matrix_list) # Calculate means by x/y elements
Fungi2.mat.mean.hull <- convhulln(melt(Fungi2.mat.mean), output.options=TRUE)
Fungi2.mat.mean.hull.vols[i] <- Fungi2.mat.mean.hull$vol
}
# Mixed (F1+F2)
Mixed.mat.mean.hull.vols <- NA
for(i in 1:1000){
Mixed_matrix_list <- list()
volume_output_Mixed <- NA
for(j in 1:100){
original.matrix<-Mixed
random.number<-rnorm(length(original.matrix$Nitrogen),0,1)
sample.order<-order(random.number)
resampled.matrix<-original.matrix[sample.order[1:round(length(sample.order)*0.5)],]
sampled.matrix <- acast(resampled.matrix, Nitrogen~Phosphorus, value.var = "TotalBiomass", mean, drop=TRUE)
sampled.mat.smooth<- image.smooth(sampled.matrix, theta=1) #smooths over missing matrix values
sampled.hull <- convhulln(melt(sampled.mat.smooth$z), output.options=TRUE)
volume_output_Mixed[j]<-sampled.hull$vol
Mixed_matrix_list[[j]] <- sampled.mat.smooth$z
}
Mixed.ids.to.remove <- sapply(Mixed_matrix_list, function(i) length(i) < 49)
Mixed_matrix_list <- Mixed_matrix_list[!Mixed.ids.to.remove]
Mixed.mat.mean <- Reduce("+", Mixed_matrix_list) / length(Mixed_matrix_list) # Calculate means by x/y elements
Mixed.mat.mean.hull <- convhulln(melt(Mixed.mat.mean), output.options=TRUE)
Mixed.mat.mean.hull.vols[i] <- Mixed.mat.mean.hull$vol
}
# Combine density distributions
vol.dat <- data.frame(vols = c(control.mat.mean.hull.vols,
Fungi1.mat.mean.hull.vols,
Fungi2.mat.mean.hull.vols,
Mixed.mat.mean.hull.vols),
trts = rep(c("Control", "Fungi1", "Fungi2", "Mixed"), each = 1000))
# Testing whether the bootstrapped distributions are significantly different between fungal treatments
# anova(lm(vols ~ trts, data = vol.dat))
# Plotting histogram of volume distributions
ggplot() +
geom_histogram(data = subset(vol.dat, trts=="Control"),
aes(x = vols), alpha = 0.75, color = "#F3DE23", fill="#F3DE23", binwidth=50, na.rm = T) +
geom_vline(xintercept = mean(control.mat.mean.hull.vols), color="#F3DE23", linetype="dashed", lwd=1.25) +
geom_histogram(data = subset(vol.dat, trts=="Fungi1"),
aes(x = vols), alpha = 0.75, color = "#36B779", fill="#36B779", binwidth=50, na.rm = T) +
geom_vline(xintercept = mean(Fungi1.mat.mean.hull.vols), color= "#36B779", linetype="dashed", lwd=1.25) +
geom_histogram(data = subset(vol.dat, trts=="Fungi2"),
aes(x = vols), alpha = 0.75, color = "#31688D", fill="#31688D", binwidth=50, na.rm = T) +
geom_vline(xintercept = mean(Fungi2.mat.mean.hull.vols), color="#31688D", linetype="dashed", lwd=1.25) +
geom_histogram(data = subset(vol.dat, trts=="Mixed"),
aes(x = vols), alpha = 0.75, color = "#440C53", fill="#440C53", binwidth=50, na.rm = T) +
geom_vline(xintercept = mean(Mixed.mat.mean.hull.vols), color="#440C53", linetype="dashed", lwd=1.25) +
ylab("Count") +
xlab("Bootstrapped Convex Hull Volume") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 210)) +
xlim(4200, 7500) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())